Individual offenses eported in TOP databases and Prossecutors Office’s Records

Show code
script src = "https://ajax.googleapis.com/ajax/libs/jquery/3.4.1/jquery.min.js"
Show code
 $(document).ready(function() {
    $('body').prepend('<div class=\"zoomDiv\"><img src=\"\" class=\"zoomImg\"></div>');
    // onClick function for all plots (img's)
    $('img:not(.zoomImg)').click(function() {
      $('.zoomImg').attr('src', $(this).attr('src')).css({width: '100%'});
      $('.zoomDiv').css({opacity: '1', width: 'auto', border: '1px solid white', borderRadius: '5px', position: 'fixed', top: '50%', left: '50%', marginRight: '-50%', transform: 'translate(-50%, -50%)', boxShadow: '0px 0px 50px #888888', zIndex: '50', overflow: 'auto', maxHeight: '100%'});
    });
    // onClick function for zoomImg
    $('img.zoomImg').click(function() {
      $('.zoomDiv').css({opacity: '0', width: '0%'}); 
    });
  });
  
Show code
<script src="hideOutput.js"></script> 
Show code
$(document).ready(function() {    
    $chunks = $('.fold');    
    $chunks.each(function () {      // add button to source code chunks     
    if ( $(this).hasClass('s') ) {       
        $('pre.r', this).prepend("<div class=\"showopt\">Show Source</div><br style=\"line-height:22px;\"/>");
            $('pre.r', this).children('code').attr('class', 'folded');     
            }      // add button to output chunks     
        if ( $(this).hasClass('o') ) {       
            $('pre:not(.r)', this).has('code').prepend("<div class=\"showopt\">Show Output</div><br style=\"line-height:22px;\"/>");       
            $('pre:not(.r)', this).children('code:not(r)').addClass('folded');        // add button to plots       
            $(this).find('img').wrap('<pre class=\"plot\"></pre>');       
            $('pre.plot', this).prepend("<div class=\"showopt\">Show Plot</div><br style=\"line-height:22px;\"/>");       
            $('pre.plot', this).children('img').addClass('folded');      
            }   
});    // hide all chunks when document is loaded   
    $('.folded').css('display', 'none')    // function to toggle the visibility   
    $('.showopt').click(function() {     
            var label = $(this).html();     
            if (label.indexOf("Show") >= 0) {       
                $(this).html(label.replace("Show", "Hide"));     
            } else {
              $(this).html(label.replace("Hide", "Show"));     
            }     
    $(this).siblings('code, img').slideToggle('fast', 'swing');   
    }); 
}); 


Bring other databases

Show code
### 1.- What is Base_fiscalia_v9, why it has 49,970 IDs & 174,961 rows : paste0("Patients in PO records with unique combination of ID, RUC, end type and date of comission of the offense and offense ", a_tab11_lab_aft_d1 (p= 5,470; RUCs= 5,993; n= 8,291) --> should be only people that commit offenses, not victims)

### 2.- why the difference of 49,970- 49,252????
### 3.- 

Base_fiscalia_v10b_dic_2022<-
  sqldf("SELECT *
  FROM CONS_C1_df_dup_SEP_2020_22_d AS x  
  LEFT JOIN (SELECT *
             FROM Base_fiscalia_v9
             ) AS y
  ON x.hash_key == y.id AND 
  x.dup = 1
  ") #2022-11-25  added dup // #changed the direction to past events, where age at discharge is greater than the age of commission //FEB 2023: WHERE ano_bd_first='2015' OR ano_bd_first='2016' OR ano_bd_first='2017' OR ano_bd_first='2018' OR ano_bd_first='2019'

message(
paste0("First, for each baseline admission of patients in C1 (", format(length(unique(CONS_C1_df_dup_SEP_2020_22_d$hash_key)), big.mark=","), "; n= ",format(nrow(CONS_C1_df_dup_SEP_2020_22_d), big.mark=","),"), we found the PO records in which is found as the offender (p=", format(length(unique(Base_fiscalia_v9$id)), big.mark=","), "; n= ",format(nrow(Base_fiscalia_v9), big.mark=","),") (p= ", format(length(unique(Base_fiscalia_v10b_dic_2022$hash_key)), big.mark=","), "; n= ",format(nrow(Base_fiscalia_v10b_dic_2022), big.mark=","),"). We did not exclude patients in C1 that were registered in databases before 2015 in this step (Feb 2023)")
)

First, for each baseline admission of patients in C1 (85,048; n= 85,048), we found the PO records in which is found as the offender (p=49,970; n= 174,961) (p= 85,048; n= 209,646). We did not exclude patients in C1 that were registered in databases before 2015 in this step (Feb 2023)

Show code
invisible("49,970 patients")

Base_fiscalia_v11b_dic_2022<-
  Base_fiscalia_v10b_dic_2022 %>% 
  #discrepancies in names of variables
  janitor::clean_names() %>%   #janitor::tabyl(!is.na(dob_imp_num))
  #previously recoded, 
  dplyr::select(-dateofbirth_imp, -country, -victim, -id_victim, -crime_code_c , -reg_c, -end_type_2c, -cod_comunadelito, -cod_lugarocurrencia, -sex_imp, -region_delito, -filter, -id)%>%
  plyr::rename(c("dateofbirth_imp_2"="dateofbirth_imp")) %>% 
  dplyr::ungroup() %>% 
  #selected the first row with distinct information regarding patient ID, case ID, crime code.
  dplyr::group_by(hash_key, dateofbirth_imp, caseid, crime_code_group_rec_prof) %>%
  dplyr::slice(1) %>% 
  dplyr::ungroup() %>% 
  dplyr::group_by(hash_key,dateofbirth_imp) %>% 
  summarise(n_off_acq= ifelse(sum(crime_code_group_rec_prof=="Acquisitive", na.rm=T)>0, 1,0), n_off_vio= ifelse(sum(crime_code_group_rec_prof=="Violent", na.rm=T)>0, 1,0), n_off_sud= ifelse(sum(crime_code_group_rec_prof== "Substance-related", na.rm=T)>0, 1,0), n_off_oth=  ifelse(sum(crime_code_group_rec_prof== "Other", na.rm=T)>0, 1,0)) %>% 
  dplyr::ungroup() %>% 
  dplyr::mutate(n_off= rowSums(select(., starts_with("n_")))) 

summarise: now 85,048 rows and 6 columns, one group variable remaining (hash_key)

select: dropped 2 variables (hash_key, dateofbirth_imp)

Show code
message(paste0( "Afterwards, we counted the offenses by type of offense (Acquisitive, violent, substance-related and other) and the total by each user before baseline admisson (p= ", format(length(unique(Base_fiscalia_v11b_dic_2022$hash_key)), big.mark=","), "; cases with records in P.O.= ",format(as.numeric(table(is.na(Base_fiscalia_v10b_dic_2022$dateofbirth_imp))[[2]]), big.mark=","),")." ))

Afterwards, we counted the offenses by type of offense (Acquisitive, violent, substance-related and other) and the total by each user before baseline admisson (p= 85,048; cases with records in P.O.= 35,345).

Time for this code chunk to run: 0.6 minutes

Show code
#Registrar hurtos, robos, violencia intrafamiliar y otras acciones cometidas en las últimas 4 semanas
#Violencia Intrafamiliar (Maltrato físico o psicológico)
CONS_TOP_2022<-
  # 107307
  CONS_TOP%>% 
  # dplyr::left_join(subset(dplyr::mutate(dplyr::group_by(Base_fiscalia_v9, id), hash_rn=row_number())%>% ungroup(), hash_rn==1), by= c("HASH_KEY" = "id"))%>% 
  dplyr::left_join(subset(dplyr::mutate(dplyr::group_by(Base_fiscalia_v11b_dic_2022, hash_key), hash_rn=row_number())%>% ungroup(), hash_rn==1), by= c("HASH_KEY" = "hash_key"))%>% 
  dplyr::mutate(fech_ap_top_num= as.numeric(as.Date(str_sub(as.character(lubridate::parse_date_time(Fecha.Aplicación.TOP, c("%Y-%m-%d"),exact=T)),1,10))))%>% #No parse failures
  dplyr::select(HASH_KEY, fech_ap_top_num, Fecha.Aplicación.TOP, dateofbirth_imp, Hurto, Robo, Venta.Drogas, Riña, Total.VIF, Otro) %>% 
  dplyr::filter(!is.na(HASH_KEY)) %>% 
  dplyr::mutate_at(vars("Hurto", "Robo", "Venta.Drogas", "Riña", "Otro"), ~ifelse(.=="S",1,0)) %>% 
  dplyr::mutate(Total.VIF= ifelse(Total.VIF>0,1,0))%>% 
  dplyr::mutate(tot_off_top = base::rowSums(dplyr::select(.,c(Hurto, Robo, Venta.Drogas, Riña, Total.VIF, Otro)), na.rm = T)) %>% 
  dplyr::mutate(dateofbirth_imp_num= as.numeric(dateofbirth_imp),
                fech_ap_top= lubridate::parse_date_time(Fecha.Aplicación.TOP, c("%Y-%m-%d"),exact=T),
                edad_a_ap_top_num= lubridate::time_length(lubridate::interval(dateofbirth_imp, fech_ap_top),unit="years"),
                edad_b_ap_top_num= (fech_ap_top_num-dateofbirth_imp_num)/365.25,
                edad_a_ap_top_num_lim= edad_a_ap_top_num-(1/12),
                edad_b_ap_top_num_lim= edad_b_ap_top_num-(1/12)) %>% 
  dplyr::select(-dateofbirth_imp, -dateofbirth_imp_num) %>% 
  dplyr::filter(!is.na(edad_a_ap_top_num)) %>% 
  dplyr::group_by(HASH_KEY, edad_a_ap_top_num) %>%
  dplyr::slice(1) %>% 
  dplyr::ungroup()

ungroup: no grouping variables

Show code
message(paste0("Then, we standardized the varibles in TOP (e.g., dates format) and counted self-reported offenses, and also we standardized data with PO records (dates of birth) to get the age when the patient have answered TOP and the age previous [ (1/12)*365.25 ]=",round((1/12)*365.25,0)," days before TOP application. This resulted in ", format(nrow(CONS_TOP_2022), big.mark=","), " rows (combination of different application dates and patients) of ", format(length(unique(CONS_TOP_2022$HASH_KEY)), big.mark=","), " patients."))

Then, we standardized the varibles in TOP (e.g., dates format) and counted self-reported offenses, and also we standardized data with PO records (dates of birth) to get the age when the patient have answered TOP and the age previous [ (1/12)*365.25 ]=30 days before TOP application. This resulted in 54,159 rows (combination of different application dates and patients) of 20,939 patients.

Time for this code chunk to run: 0.1 minutes

Show code
Base_fiscalia_v9_dic_2022<-
Base_fiscalia_v9 %>% 
  dplyr::group_by(id, caseid, end_type, fec_comision_simple, crime_code_c) %>% 
  dplyr::slice(1) %>% 
  dplyr::ungroup()

message(
paste0("We reduced PO records to those that do not have the same sentence for the same offense committed at the same date in the same process (case ID/RUC) (p= ", format(length(unique(Base_fiscalia_v9_dic_2022$id)), big.mark=","), "; n= ",format(nrow(Base_fiscalia_v9_dic_2022), big.mark=","),")")
)

We reduced PO records to those that do not have the same sentence for the same offense committed at the same date in the same process (case ID/RUC) (p= 49,970; n= 166,670)

Show code
#busco en la base de datos de fiscalía, algún delito que haya cometido en el transcurso 
#del último mes anterior a la aplicación del TOP
Base_fiscalia_v13c_dic_2022<-
  sqldf("SELECT *
  FROM CONS_TOP_2022 AS x  
  LEFT JOIN (SELECT *
             FROM Base_fiscalia_v9_dic_2022
             ) AS y
  ON x.HASH_KEY == y.id AND 
  x.edad_a_ap_top_num > y.age_offending_imp AND 
  x.edad_a_ap_top_num_lim < y.age_offending_imp") #2022-11-25  added dup // #changed the direction to past events, where age at discharge is greater than the age of commission

message(
paste0("Of the total of TOP applications (p= ", format(length(unique(CONS_TOP_2022$HASH_KEY)), big.mark=","), "; n= ",format(nrow(CONS_TOP_2022), big.mark=","),"), we looked for offenses comitted in the period of thirty days before (`x.edad_a_ap_top_num_lim`) the application of TOP (p= ", format(length(unique(Base_fiscalia_v13c_dic_2022$HASH_KEY)), big.mark=","), "; n= ",format(nrow(Base_fiscalia_v13c_dic_2022), big.mark=","),"). If this database contains more records, is beacause there may be some patients that had more than one record as a result of more than one application of TOP. This will be resolved in the next step")
)

Of the total of TOP applications (p= 20,939; n= 54,159), we looked for offenses comitted in the period of thirty days before (x.edad_a_ap_top_num_lim) the application of TOP (p= 20,939; n= 54,290). If this database contains more records, is beacause there may be some patients that had more than one record as a result of more than one application of TOP. This will be resolved in the next step

Show code
#Luego, contar por cada combinación de HASH y fecha de aplicación, el número de delitos segun familai de delito
#crime_code_group
Base_fiscalia_v13c_dic_2022_2<-
  Base_fiscalia_v13c_dic_2022 %>% 
  dplyr::select(tidyr::any_of(c("HASH_KEY", "fech_ap_top_num", "Fecha.Aplicación.TOP", "Hurto", "Robo", "Venta.Drogas", "Riña",
                "Total.VIF", "Otro", "tot_off_top", "fech_ap_top", "edad_a_ap_top_num", "edad_b_ap_top_num",
                "edad_a_ap_top_num_lim", "edad_b_ap_top_num_lim", "caseid", "end_type", "fec_comision_simple",
                "crime_code_c", "crime_code_group_rec", "crime_code_group_rec_prof", "age_offending_imp")))
#HASH_KEY
#fech_ap_top_num
#Fecha.Aplicación.TOP
#Hurto
#Robo
#Venta.Drogas
#Riña
#Total.VIF
#Otro
#tot_off_top
#fech_ap_top
#edad_a_ap_top_num
#edad_b_ap_top_num
#edad_a_ap_top_num_lim
#edad_b_ap_top_num_lim
#caseid
#end_type
#fec_comision_simple
#crime_code_c
#crime_code_group_rec_prof
#crime_code_group_rec
#age_offending_imp

invisible("# 131 rows added, why?")
message("Patients than had more than one combination of application date and ID. That means, they answered TOP more than one time, and in at least 2 of these times they had previous records: "); Base_fiscalia_v13c_dic_2022_2 %>% 
    dplyr::count(HASH_KEY, fech_ap_top_num) %>% 
    dplyr::filter(n>1) %>% 
    dplyr::summarise(sum=sum(n), n=n(), tot=sum-n)

Patients than had more than one combination of application date and ID. That means, they answered TOP more than one time, and in at least 2 of these times they had previous records:

  sum   n tot
1 245 114 131
Show code
Base_fiscalia_v13c_dic_2022_3<-
Base_fiscalia_v13c_dic_2022_2 %>% 
  dplyr::ungroup() %>% 
  dplyr::mutate(vio=dplyr::if_else(crime_code_group_rec_prof=="Violent",1,0,0),
                acq=dplyr::if_else(crime_code_group_rec_prof=="Acquisitive",1,0,0),
                sud=dplyr::if_else(crime_code_group_rec_prof=="Substance-related",1,0,0),
                oth=dplyr::if_else(crime_code_group_rec_prof=="Other",1,0,0)) %>% 
  dplyr::mutate(tot= rowSums(select(., c("vio","acq","sud","oth")), na.rm=T)) %>% 
  dplyr::mutate(acq_top= rowSums(select(., c("Hurto","Robo")), na.rm=T)) %>%
  dplyr::mutate(sud_top= rowSums(select(., c("Venta.Drogas")), na.rm=T)) %>%
  dplyr::mutate(vio_top= rowSums(select(., c("Riña", "Total.VIF")), na.rm=T)) %>%
  dplyr::mutate(oth_top= rowSums(select(., c("Otro")), na.rm=T)) %>% 
  dplyr::group_by(HASH_KEY, fech_ap_top_num) %>% 
  dplyr::mutate(vio=sum(vio,na.rm=T), acq=sum(acq,na.rm=T), sud=sum(sud,na.rm=T), 
                oth=sum(oth,na.rm=T), tot=sum(tot,na.rm=T), n=n()) %>%
  dplyr::slice(1) %>% 
  dplyr::ungroup() 

select: dropped 22 variables (HASH_KEY, fech_ap_top_num, Fecha.Aplicación.TOP, Hurto, Robo, …)

select: dropped 25 variables (HASH_KEY, fech_ap_top_num, Fecha.Aplicación.TOP, Venta.Drogas, Riña, …)

select: dropped 27 variables (HASH_KEY, fech_ap_top_num, Fecha.Aplicación.TOP, Hurto, Robo, …) select: dropped 27 variables (HASH_KEY, fech_ap_top_num, Fecha.Aplicación.TOP, Hurto, Robo, …)

select: dropped 29 variables (HASH_KEY, fech_ap_top_num, Fecha.Aplicación.TOP, Hurto, Robo, …)

Show code
#Los que tienen todo pelado en caseid es porque no tienen fecha de comisión en los últimos 30 días
message(
paste0("For each combination of TOP application, count the number of previous offenses in PO records (those with 0s are people that did not committed an offense recorded by PO in the period previous to the application of TOP) (p= ", format(length(unique(Base_fiscalia_v13c_dic_2022_3$HASH_KEY)), big.mark=","), "; n= ",format(nrow(Base_fiscalia_v13c_dic_2022_3), big.mark=","),")")
)

For each combination of TOP application, count the number of previous offenses in PO records (those with 0s are people that did not committed an offense recorded by PO in the period previous to the application of TOP) (p= 20,939; n= 54,159)

Time for this code chunk to run: 0.4 minutes


Comparison

We compared the records of TOP (Hurto,Robo, Venta.Drogas, Riña, Total.VIF, Otro, & tot_off_top) with the records of PO (vio, acq, sud, oth & tot). We grouped TOPs self-reports of offenses related to burglary or robbery into acquisitive (acq_top), the reports of drug-selling into substance related (sud_top) and reports of fights and domestic violence into violent (vio_top) and Other into other (oth_top).

Show code
cat("Recode to binary measures")
Recode to binary measures
Show code
Base_fiscalia_v13c_dic_2022_3$tot_off_top_bin<-
ifelse(Base_fiscalia_v13c_dic_2022_3$tot_off_top>=1,1,0)
Base_fiscalia_v13c_dic_2022_3$tot_bin<-
ifelse(Base_fiscalia_v13c_dic_2022_3$tot>=1,1,0)

Base_fiscalia_v13c_dic_2022_3$vio_bin<-
ifelse(Base_fiscalia_v13c_dic_2022_3$vio>=1,1,0)
Base_fiscalia_v13c_dic_2022_3$acq_bin<-
ifelse(Base_fiscalia_v13c_dic_2022_3$acq>=1,1,0)
Base_fiscalia_v13c_dic_2022_3$sud_bin<-
ifelse(Base_fiscalia_v13c_dic_2022_3$sud>=1,1,0)
Base_fiscalia_v13c_dic_2022_3$oth_bin<-
ifelse(Base_fiscalia_v13c_dic_2022_3$oth>=1,1,0)

Base_fiscalia_v13c_dic_2022_3$acq_top_bin<-
ifelse(Base_fiscalia_v13c_dic_2022_3$acq_top>=1,1,0)
Base_fiscalia_v13c_dic_2022_3$sud_top_bin<-
ifelse(Base_fiscalia_v13c_dic_2022_3$sud_top>=1,1,0)
Base_fiscalia_v13c_dic_2022_3$vio_top_bin<-
ifelse(Base_fiscalia_v13c_dic_2022_3$vio_top>=1,1,0)
Base_fiscalia_v13c_dic_2022_3$oth_top_bin<-
ifelse(Base_fiscalia_v13c_dic_2022_3$oth_top>=1,1,0)

Time for this code chunk to run: 0 minutes

Show code
cat("::: {.panel}", "\n", "[Total]{.panel-name}", "\n")
Total
Show code
cat("**Total records of offenses vs. TOP reported offenses**")
Total records of offenses vs. TOP reported offenses
Show code
paste0("**Records from PO**")
[1] “Records from PO
Show code
message(
paste0(median(Base_fiscalia_v13c_dic_2022_3$tot,na.rm=T), ' (p2.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$tot,.025,na.rm=T),',  p97.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$tot,1-.025, na.rm=T),')')
)

0 (p2.5= 0, p97.5= 0)

Show code
paste0("**Reports from TOP**")
[1] “Reports from TOP
Show code
message(
  paste0(median(Base_fiscalia_v13c_dic_2022_3$tot_off_top,na.rm=T), ' (p 2.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$tot_off_top,.025,na.rm=T),',  p97.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$tot_off_top,1-.025, na.rm=T),')')
)

0 (p 2.5= 0, p97.5= 3)

Show code
pol_p<-
polychor(Base_fiscalia_v13c_dic_2022_3$tot_off_top, Base_fiscalia_v13c_dic_2022_3$tot, std.err=T, ML=T)

pol_p2<-
psych::polychoric(table(Base_fiscalia_v13c_dic_2022_3$tot_off_top, Base_fiscalia_v13c_dic_2022_3$tot))
[1] “You seem to have a table, I will return just one correlation.”
Show code
#rho  : The (matrix) of tetrachoric/polychoric/biserial correlations
#tau  : The normal equivalent of the cutpoints

#for ordinal variables, the same idea holds...but now we have more than one τ-cut to subdivide the normal distribution into regions associated with the different values of the ordinal measure..

# polychoric is appropriate when the manifest ordinal variables came from categorizing latent normal variables & not otherwise. (In practice, it's more like when you are willing to assume thi

#We start with a relative frequency pattern for our contingency table of ordinal data in the lower-left, and we find the optimal solution for correlation and tau-cuts to get us back to a bivariate normal distribution that would have comparable relative joint-frequencies.

cat("**Polychoric correlation: Total records of offenses vs. TOP reported offenses**")
Polychoric correlation: Total records of offenses vs. TOP reported offenses
Show code
message(
capture.output(print(pol_p))[c(2,3)]
)

Polychoric Correlation, ML est. = 0.3197 (0.01333)Test of bivariate normality: Chisquare = 69.02, df = 23, p = 1.719e-06

Show code
cat("**Overdispersion**")
Overdispersion
Show code
message(
round(var(Base_fiscalia_v13c_dic_2022_3$tot_off_top)/mean(Base_fiscalia_v13c_dic_2022_3$tot_off_top),2)
)

1.82

Show code
#The variance is not much greater than the mean, which suggests that we will have over-dispersion in the model.
# obtained with a poisson regression

cat("**Recode**")
Recode
Show code
Base_fiscalia_v13c_dic_2022_3$tot_off_top_rec<-
ifelse(Base_fiscalia_v13c_dic_2022_3$tot_off_top>=4,4,Base_fiscalia_v13c_dic_2022_3$tot_off_top)

cat("**Confusion Matrix**")
Confusion Matrix
Show code
#For more than two classes, these results are calculated comparing each factor level to the remaining levels (i.e. a "one versus all" approach).
# accuracy should be higher than No information rate (naive classifier) in order to be model significant. 
#No information rate tests whether our classifier does better than random assignment
# A hypothesis test is also computed to evaluate whether the overall accuracy rate is greater than the rate of the largest class. P-Value [Acc > NIR]
#Null Error Rate: This is how often you would be wrong if you always predicted the majority class.
# but sometimes the best classifier for a particular application will sometimes have a higher error rate than the null error rate, as in the accuracy paradox
#if the incidence of category A is dominant, being found in 99% of cases, then predicting that every case is category A will have an accuracy of 99%

#Kappa statistics is a measure of how the classification results compare to values assigned by chance. P-value mcnemar, can produce NA values with sparse tables)
pander::pander(caret::confusionMatrix(factor(Base_fiscalia_v13c_dic_2022_3$tot_off_top_rec), factor(Base_fiscalia_v13c_dic_2022_3$tot))) 
  • positive:

  • table:

      0 1 2 3 4
    0 40984 479 35 2 1
    1 7624 309 32 5 2
    2 2442 146 21 4 0
    3 1140 68 7 0 0
    4 819 34 5 0 0
  • overall:

    Table continues below
    Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
    0.7628 0.0401 0.7592 0.7664 0.9788
    AccuracyPValue McnemarPValue
    1 NA
  • byClass:

    Table continues below
      Sensitivity Specificity Pos Pred Value Neg Pred Value
    Class: 0 0.7732 0.5504 0.9875 0.05001
    Class: 1 0.2983 0.8557 0.03876 0.9843
    Class: 2 0.21 0.9521 0.008037 0.9985
    Class: 3 0 0.9776 0 0.9998
    Class: 4 0 0.9842 0 0.9999
    Table continues below
      Precision Recall F1 Prevalence Detection Rate
    Class: 0 0.9875 0.7732 0.8673 0.9788 0.7567
    Class: 1 0.03876 0.2983 0.06861 0.01913 0.005705
    Class: 2 0.008037 0.21 0.01548 0.001846 0.0003877
    Class: 3 0 0 NA 0.0002031 0
    Class: 4 0 0 NA 5.539e-05 0
      Detection Prevalence Balanced Accuracy
    Class: 0 0.7663 0.6618
    Class: 1 0.1472 0.577
    Class: 2 0.04825 0.581
    Class: 3 0.02243 0.4888
    Class: 4 0.01584 0.4921
  • mode: sens_spec

  • dots:

Show code
cat("**Kappa with ordinal weights**")
Kappa with ordinal weights
Show code
pander::pander(irrCAC::kappa2.table(table(ordered(Base_fiscalia_v13c_dic_2022_3$tot_off_top_rec), ordered(Base_fiscalia_v13c_dic_2022_3$tot)),weights = irrCAC::ordinal.weights(0:4)))
coeff.name coeff.val coeff.se coeff.ci coeff.pval
Cohen’s Kappa 0.0349 0.002059 (0.031,0.039) 0e+00
Show code
#really low
#To choose between linear and quadratic weights, ask yourself if the difference between being off by 1 vs. 2 categories is the same as the difference between being off by 2 vs. 3 categories. With linear weights, the penalty is always the same (e.g., 0.33 credit is subtracted for each additional category). However, this is not the case for quadratic weights, where penalties begin mild then grow harsher.
#CONS: Depends on the marginal distribution (prevalence) of the categories

cat("**Plot**")
Plot
Show code
ggstatsplot::ggscatterstats(tot_off_top, tot, data=Base_fiscalia_v13c_dic_2022_3, 
                            type="nonparametric", point.width.jitter=.9, point.height.jitter=.9,
                            xlab="Number of TOP offenses", ylab="Number of PO offenses", 
                            title="Relationship between Total counts of offenses from PO and TOP")

stat_bin() using bins = 30. Pick better value with binwidth.

stat_bin() using bins = 30. Pick better value with binwidth.

Show code
cat(":::", "\n")
Show code
cat("::: {.panel}", "\n", "[Acquisitive]{.panel-name}", "\n")
Acquisitive
Show code
cat("**Total records of offenses vs. TOP reported offenses**")
Total records of offenses vs. TOP reported offenses
Show code
paste0("**Records from PO**")
[1] “Records from PO
Show code
message(
paste0("Mdn= ", median(Base_fiscalia_v13c_dic_2022_3$acq,na.rm=T), ' (p2.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$acq,.025,na.rm=T),',  p97.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$acq,1-.025, na.rm=T),')')
)

Mdn= 0 (p2.5= 0, p97.5= 0)

Show code
paste0("**Reports from TOP**")
[1] “Reports from TOP
Show code
message(
paste0("Mdn= ", median(Base_fiscalia_v13c_dic_2022_3$acq_top,na.rm=T), ' (p 2.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$acq_top,.025,na.rm=T),',  p97.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$acq_top,1-.025, na.rm=T),')')
)

Mdn= 0 (p 2.5= 0, p97.5= 2)

Show code
pol_p_acq<-
polychor(Base_fiscalia_v13c_dic_2022_3$acq_top, Base_fiscalia_v13c_dic_2022_3$acq, std.err=T, ML=T)

pol_p2_acq<-
psych::polychoric(table(Base_fiscalia_v13c_dic_2022_3$acq_top, Base_fiscalia_v13c_dic_2022_3$acq))
[1] “You seem to have a table, I will return just one correlation.”
Show code
cat("**Polychoric correlation: Total records of offenses vs. TOP reported offenses**")
Polychoric correlation: Total records of offenses vs. TOP reported offenses
Show code
message(capture.output(print(pol_p_acq))[c(2,3)])

Polychoric Correlation, ML est. = 0.4372 (0.02007)Test of bivariate normality: Chisquare = 25.55, df = 7, p = 0.0006063

Show code
cat("**Recode**")
Recode
Show code
Base_fiscalia_v13c_dic_2022_3$acq_rec<-
ifelse(Base_fiscalia_v13c_dic_2022_3$acq>=2,2,Base_fiscalia_v13c_dic_2022_3$acq)

cat("**Confusion Matrix**")
Confusion Matrix
Show code
pander::pander(tidy(caret::confusionMatrix(factor(Base_fiscalia_v13c_dic_2022_3$acq_top), factor(Base_fiscalia_v13c_dic_2022_3$acq_rec))))
term class estimate conf.low conf.high p.value
accuracy NA 0.916 0.9137 0.9184 1
kappa NA 0.04983 NA NA NA
mcnemar NA NA NA NA 0
sensitivity 0 0.9213 NA NA NA
specificity 0 0.441 NA NA NA
pos_pred_value 0 0.9952 NA NA NA
neg_pred_value 0 0.04234 NA NA NA
precision 0 0.9952 NA NA NA
recall 0 0.9213 NA NA NA
f1 0 0.9568 NA NA NA
prevalence 0 0.9922 NA NA NA
detection_rate 0 0.9141 NA NA NA
detection_prevalence 0 0.9184 NA NA NA
balanced_accuracy 0 0.6812 NA NA NA
sensitivity 1 0.2519 NA NA NA
specificity 1 0.951 NA NA NA
pos_pred_value 1 0.03692 NA NA NA
neg_pred_value 1 0.9942 NA NA NA
precision 1 0.03692 NA NA NA
recall 1 0.2519 NA NA NA
f1 1 0.06439 NA NA NA
prevalence 1 0.007404 NA NA NA
detection_rate 1 0.001865 NA NA NA
detection_prevalence 1 0.05052 NA NA NA
balanced_accuracy 1 0.6014 NA NA NA
sensitivity 2 0.2609 NA NA NA
specificity 2 0.9691 NA NA NA
pos_pred_value 2 0.003569 NA NA NA
neg_pred_value 2 0.9997 NA NA NA
precision 2 0.003569 NA NA NA
recall 2 0.2609 NA NA NA
f1 2 0.007042 NA NA NA
prevalence 2 0.0004247 NA NA NA
detection_rate 2 0.0001108 NA NA NA
detection_prevalence 2 0.03104 NA NA NA
balanced_accuracy 2 0.615 NA NA NA
Show code
cat("**Kappa with ordinal weights**")
Kappa with ordinal weights
Show code
pander::pander(irrCAC::kappa2.table(table(ordered(Base_fiscalia_v13c_dic_2022_3$acq_top), ordered(Base_fiscalia_v13c_dic_2022_3$acq_rec)),weights = irrCAC::ordinal.weights(0:2)))
coeff.name coeff.val coeff.se coeff.ci coeff.pval
Cohen’s Kappa 0.04807 0.004008 (0.04,0.056) 0e+00
Show code
cat("**Plot**")
Plot
Show code
ggstatsplot::ggscatterstats(acq_top, acq, data=Base_fiscalia_v13c_dic_2022_3, 
                            type="nonparametric", point.width.jitter=.9, point.height.jitter=.9,
                            xlab="Number of TOP offenses", ylab="Number of PO offenses", 
                            title="Relationship between counts of Acquisitive offenses from PO and TOP")

stat_bin() using bins = 30. Pick better value with binwidth. stat_bin() using bins = 30. Pick better value with binwidth.

Show code
cat(":::", "\n")
Show code
cat("::: {.panel}", "\n", "[SUD]{.panel-name}", "\n")
SUD
Show code
cat("**Total records of offenses vs. TOP reported offenses**")
Total records of offenses vs. TOP reported offenses
Show code
paste0("**Records from PO**")
[1] “Records from PO
Show code
message(
paste0("Mdn= ", median(Base_fiscalia_v13c_dic_2022_3$sud,na.rm=T), ' (p2.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$sud,.025,na.rm=T),',  p97.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$sud,1-.025, na.rm=T),')')
)

Mdn= 0 (p2.5= 0, p97.5= 0)

Show code
paste0("**Reports from TOP**")
[1] “Reports from TOP
Show code
message(
paste0("Mdn= ", median(Base_fiscalia_v13c_dic_2022_3$sud_top,na.rm=T), ' (p 2.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$sud_top,.025,na.rm=T),',  p97.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$sud_top,1-.025, na.rm=T),')')
)

Mdn= 0 (p 2.5= 0, p97.5= 1)

Show code
pol_p_sud<-
polychor(Base_fiscalia_v13c_dic_2022_3$sud_top, Base_fiscalia_v13c_dic_2022_3$sud, std.err=T, ML=T)

pol_p2_sud<-
psych::polychoric(table(Base_fiscalia_v13c_dic_2022_3$sud_top, Base_fiscalia_v13c_dic_2022_3$sud))
[1] “You seem to have a table, I will return just one correlation.”
Show code
cat("**Polychoric correlation: Total records of offenses vs. TOP reported offenses**")
Polychoric correlation: Total records of offenses vs. TOP reported offenses
Show code
message(capture.output(print(pol_p_sud))[c(2,3)])

Polychoric Correlation, ML est. = 0.1965 (0.04568)Test of bivariate normality: Chisquare = 1.419, df = 2, p = 0.492

Show code
cat("**Recode**")
Recode
Show code
Base_fiscalia_v13c_dic_2022_3$sud_rec<-
ifelse(Base_fiscalia_v13c_dic_2022_3$sud>=1,1,Base_fiscalia_v13c_dic_2022_3$sud)

cat("**Confusion Matrix**")
Confusion Matrix
Show code
pander::pander(tidy(
caret::confusionMatrix(factor(Base_fiscalia_v13c_dic_2022_3$sud_top), factor(Base_fiscalia_v13c_dic_2022_3$sud_rec))
))
term class estimate conf.low conf.high p.value
accuracy NA 0.9682 0.9667 0.9697 1
kappa NA 0.01342 NA NA NA
mcnemar NA NA NA NA 2.423e-246
sensitivity 0 0.9712 NA NA NA
specificity 0 0.09341 NA NA NA
pos_pred_value 0 0.9969 NA NA NA
neg_pred_value 0 0.0108 NA NA NA
precision 0 0.9969 NA NA NA
recall 0 0.9712 NA NA NA
f1 0 0.9838 NA NA NA
prevalence 0 0.9966 NA NA NA
detection_rate 0 0.9679 NA NA NA
detection_prevalence 0 0.9709 NA NA NA
balanced_accuracy 0 0.5323 NA NA NA
Show code
cat(":::", "\n")
Show code
cat("::: {.panel}", "\n", "[Violent]{.panel-name}", "\n")
Violent
Show code
cat("**Total records of offenses vs. TOP reported offenses**")
Total records of offenses vs. TOP reported offenses
Show code
paste0("**Records from PO**")
[1] “Records from PO
Show code
message(
paste0("Mdn= ", median(Base_fiscalia_v13c_dic_2022_3$vio,na.rm=T), ' (p2.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$vio,.025,na.rm=T),',  p97.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$vio,1-.025, na.rm=T),')')
)

Mdn= 0 (p2.5= 0, p97.5= 0)

Show code
paste0("**Reports from TOP**")
[1] “Reports from TOP
Show code
message(
paste0("Mdn= ", median(Base_fiscalia_v13c_dic_2022_3$vio_top,na.rm=T), ' (p 2.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$vio_top,.025,na.rm=T),',  p97.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$vio_top,1-.025, na.rm=T),')')
)

Mdn= 0 (p 2.5= 0, p97.5= 2)

Show code
pol_p_vio<-
polychor(Base_fiscalia_v13c_dic_2022_3$vio_top, Base_fiscalia_v13c_dic_2022_3$vio, std.err=T, ML=T)

pol_p2_vio<-
psych::polychoric(table(Base_fiscalia_v13c_dic_2022_3$vio_top, Base_fiscalia_v13c_dic_2022_3$vio))
[1] “You seem to have a table, I will return just one correlation.”
Show code
cat("**Polychoric correlation: Total records of offenses vs. TOP reported offenses**")
Polychoric correlation: Total records of offenses vs. TOP reported offenses
Show code
pander::pander(capture.output(print(pol_p_vio))[c(2,3)])
Polychoric Correlation, ML est. = 0.3468 (0.02255) and Test of bivariate normality: Chisquare = 23.84, df = 5, p = 0.0002335
Show code
cat("**Recode**")
Recode
Show code
Base_fiscalia_v13c_dic_2022_3$vio_rec<-
ifelse(Base_fiscalia_v13c_dic_2022_3$vio>=2,2,Base_fiscalia_v13c_dic_2022_3$vio)

cat("**Confusion Matrix**")
Confusion Matrix
Show code
pander::pander(tidy(
  caret::confusionMatrix(factor(Base_fiscalia_v13c_dic_2022_3$vio_top), factor(Base_fiscalia_v13c_dic_2022_3$vio_rec))
))
term class estimate conf.low conf.high p.value
accuracy NA 0.8235 0.8203 0.8267 1
kappa NA 0.01975 NA NA NA
mcnemar NA NA NA NA 0
sensitivity 0 0.8257 NA NA NA
specificity 0 0.564 NA NA NA
pos_pred_value 0 0.9972 NA NA NA
neg_pred_value 0 0.01706 NA NA NA
precision 0 0.9972 NA NA NA
recall 0 0.8257 NA NA NA
f1 0 0.9034 NA NA NA
prevalence 0 0.9947 NA NA NA
detection_rate 0 0.8213 NA NA NA
detection_prevalence 0 0.8236 NA NA NA
balanced_accuracy 0 0.6949 NA NA NA
sensitivity 1 0.4326 NA NA NA
specificity 1 0.8553 NA NA NA
pos_pred_value 1 0.01541 NA NA NA
neg_pred_value 1 0.9965 NA NA NA
precision 1 0.01541 NA NA NA
recall 1 0.4326 NA NA NA
f1 1 0.02976 NA NA NA
prevalence 1 0.005207 NA NA NA
detection_rate 1 0.002253 NA NA NA
detection_prevalence 1 0.1462 NA NA NA
balanced_accuracy 1 0.644 NA NA NA
sensitivity 2 0 NA NA NA
specificity 2 0.9698 NA NA NA
pos_pred_value 2 0 NA NA NA
neg_pred_value 2 0.9999 NA NA NA
precision 2 0 NA NA NA
recall 2 0 NA NA NA
f1 2 NA NA NA NA
prevalence 2 0.0001292 NA NA NA
detection_rate 2 0 NA NA NA
detection_prevalence 2 0.03023 NA NA NA
balanced_accuracy 2 0.4849 NA NA NA
Show code
cat("**Kappa with ordinal weights**")
Kappa with ordinal weights
Show code
pander::pander(
irrCAC::kappa2.table(table(ordered(Base_fiscalia_v13c_dic_2022_3$vio_top), ordered(Base_fiscalia_v13c_dic_2022_3$vio_rec)),weights = irrCAC::ordinal.weights(0:2))
)
coeff.name coeff.val coeff.se coeff.ci coeff.pval
Cohen’s Kappa 0.01956 0.00187 (0.016,0.023) 0e+00
Show code
cat("**Plot**")
Plot
Show code
ggstatsplot::ggscatterstats(vio_top, vio, data=Base_fiscalia_v13c_dic_2022_3, 
                            type="nonparametric", point.width.jitter=.9, point.height.jitter=.9,
                            xlab="Number of TOP offenses", ylab="Number of PO offenses", 
                            title="Relationship between counts of violent offenses from PO and TOP")

stat_bin() using bins = 30. Pick better value with binwidth. stat_bin() using bins = 30. Pick better value with binwidth.

Show code
cat(":::", "\n")
Show code
cat("::: {.panel}", "\n", "[Other]{.panel-name}", "\n")
Other
Show code
cat("**Total records of offenses vs. TOP reported offenses**")
Total records of offenses vs. TOP reported offenses
Show code
paste0("**Records from PO**")
[1] “Records from PO
Show code
message(
paste0("Mdn= ", median(Base_fiscalia_v13c_dic_2022_3$oth,na.rm=T), ' (p2.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$oth,.025,na.rm=T),',  p97.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$oth,1-.025, na.rm=T),')')
)

Mdn= 0 (p2.5= 0, p97.5= 0)

Show code
paste0("**Reports from TOP**")
[1] “Reports from TOP
Show code
message(
paste0("Mdn= ", median(Base_fiscalia_v13c_dic_2022_3$oth_top,na.rm=T), ' (p 2.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$oth_top,.025,na.rm=T),',  p97.5= ',quantile(Base_fiscalia_v13c_dic_2022_3$oth_top,1-.025, na.rm=T),')')
)

Mdn= 0 (p 2.5= 0, p97.5= 1)

Show code
pol_p_oth<-
polychor(Base_fiscalia_v13c_dic_2022_3$oth_top, Base_fiscalia_v13c_dic_2022_3$oth, std.err=T, ML=T)

pol_p2_oth<-
psych::polychoric(table(Base_fiscalia_v13c_dic_2022_3$oth_top, Base_fiscalia_v13c_dic_2022_3$oth))
[1] “You seem to have a table, I will return just one correlation.”
Show code
cat("**Polychoric correlation: Total records of offenses vs. TOP reported offenses**")
Polychoric correlation: Total records of offenses vs. TOP reported offenses
Show code
message(capture.output(print(pol_p_oth))[c(2,3)])

Polychoric Correlation, ML est. = 0.187 (0.0373)Test of bivariate normality: Chisquare = 1.082, df = 2, p = 0.582

Show code
cat("**Recode**")
Recode
Show code
Base_fiscalia_v13c_dic_2022_3$oth_rec<-
ifelse(Base_fiscalia_v13c_dic_2022_3$oth>=1,1,Base_fiscalia_v13c_dic_2022_3$oth)

cat("**Confusion Matrix**")
Confusion Matrix
Show code
pander::pander(tidy(
caret::confusionMatrix(factor(Base_fiscalia_v13c_dic_2022_3$oth_top), factor(Base_fiscalia_v13c_dic_2022_3$oth_rec))
))
term class estimate conf.low conf.high p.value
accuracy NA 0.9648 0.9632 0.9663 1
kappa NA 0.01875 NA NA NA
mcnemar NA NA NA NA 1.654e-199
sensitivity 0 0.9701 NA NA NA
specificity 0 0.08669 NA NA NA
pos_pred_value 0 0.9944 NA NA NA
neg_pred_value 0 0.01707 NA NA NA
precision 0 0.9944 NA NA NA
recall 0 0.9701 NA NA NA
f1 0 0.9821 NA NA NA
prevalence 0 0.994 NA NA NA
detection_rate 0 0.9643 NA NA NA
detection_prevalence 0 0.9697 NA NA NA
balanced_accuracy 0 0.5284 NA NA NA
Show code
cat(":::", "\n")

Time for this code chunk to run: 0.5 minutes

Show code
#
cat("Manually construct measure tables")
#http://rstudio-pubs-static.s3.amazonaws.com/370944_96c386c03ac54ef3bec4535d49e92890.html

confusion_table = table(Base_fiscalia_v13c_dic_2022_3$tot_off_top_bin, Base_fiscalia_v13c_dic_2022_3$tot_bin)
confusion_table[1,1] = 'TN'
confusion_table[1,2] = 'FN'
confusion_table[2,1] = 'FP'
confusion_table[2,2] = 'TP'

get_accuracy <- function(df, predicted, actual){
  confusion_table = table(df[[predicted]],df[[actual]])
  TP = confusion_table[2,2]
  TN = confusion_table[1,1]
  FN = confusion_table[1,2]
  FP = confusion_table[2,1]
  accuracy = round((TP + TN) / sum(TP,FP,TN,FN), 2)
  return(accuracy)
}
get_classification_error_rate <- function(df, predicted, actual){
  confusion_table = table(df[[predicted]],df[[actual]])
  TP = confusion_table[2,2]
  TN = confusion_table[1,1]
  FN = confusion_table[1,2]
  FP = confusion_table[2,1]
  classification_error_rate = round((FP + FN) / sum(TP,FP,TN,FN),2)
  return(classification_error_rate)
}
get_precision <- function(df, predicted, actual){
  confusion_table = table(df[[predicted]],df[[actual]])
  TP = confusion_table[2,2]
  TN = confusion_table[1,1]
  FN = confusion_table[1,2]
  FP = confusion_table[2,1]
  precision = round(TP / (TP + FP), 2)
  return(precision)
}
get_sensitivity <- function(df, predicted, actual){
  confusion_table = table(df[[predicted]],df[[actual]])
  TP = confusion_table[2,2]
  TN = confusion_table[1,1]
  FN = confusion_table[1,2]
  FP = confusion_table[2,1]
  sensitivity = round(TP / (TP + FN), 2)
  return(sensitivity)
}
get_specificity <- function(df, predicted, actual){
  confusion_table = table(df[[predicted]],df[[actual]])
  TP = confusion_table[2,2]
  TN = confusion_table[1,1]
  FN = confusion_table[1,2]
  FP = confusion_table[2,1]
  specificity = round(TN / (TN + FP), 2)
  return(specificity)
}
get_f1_score <- function(df, predicted, actual){
  confusion_table = table(df[[predicted]],df[[actual]])
  TP = confusion_table[2,2]
  TN = confusion_table[1,1]
  FN = confusion_table[1,2]
  FP = confusion_table[2,1]
  
  precision = round(TP / (TP + FP), 2)
  sensitivity = round(TP / (TP + FN), 2)
  f1_score = round((2 * precision * sensitivity) / (precision + sensitivity), 2)
  return(f1_score)
}

caret::confusionMatrix(factor(Base_fiscalia_v13c_dic_2022_3$tot_off_top_bin), factor(Base_fiscalia_v13c_dic_2022_3$tot_bin))

score = data.frame(accuracy=get_accuracy(Base_fiscalia_v13c_dic_2022_3, 'tot_off_top_bin', 'tot_bin'),
      classification_error_rate=get_classification_error_rate(Base_fiscalia_v13c_dic_2022_3, 'tot_off_top_bin', 'tot_bin'),
      precision=get_precision(Base_fiscalia_v13c_dic_2022_3, 'tot_off_top_bin', 'tot_bin'),
      sensitivity=get_sensitivity(Base_fiscalia_v13c_dic_2022_3, 'tot_off_top_bin', 'tot_bin'),
      specificity=get_specificity(Base_fiscalia_v13c_dic_2022_3, 'tot_off_top_bin', 'tot_bin'),
      f1_score=get_f1_score(Base_fiscalia_v13c_dic_2022_3, 'tot_off_top_bin', 'tot_bin'))

#We can also combine precision and recall into an F1 score. This is the harmonic mean of precision and recall.
knitr::kable(score, caption= "Table 1. Properties")# %>% kableExtra::kable_classic()

Time for this code chunk to run: 0 minutes


ROC

For ROC curves, we dichotomized into committing a crime or not against reporting or not (*_bin). Posteriorly, we calculated confidence intervals using bootstrap with 500 replications.

Show code
#Acquisitive  SUD   Violent Other
suppressMessages(suppressWarnings(library(ROCit)))
## Warning: package 'ROCit' was built under R version 3.5.2
message("Error in convertclass(class, reference = negref) :    class must have exactly two unique values)")

Error in convertclass(class, reference = negref) : class must have exactly two unique values)

Show code
cat("::::: {.panelset}", "\n")
Show code
cat("::: {.panel}", "\n", "[Total]{.panel-name}", "\n")
Total
Show code
ROCit_obj <- rocit(score=Base_fiscalia_v13c_dic_2022_3$tot_off_top_bin,class=Base_fiscalia_v13c_dic_2022_3$tot_bin)
plot(ROCit_obj)
Show code
#Del YOuden, el óptimo es...
pander::pander(ciROC(ROCit_obj,nboot = 500))
  • ROC estimation method: empirical
  • Confidence level: 95%
  • FPR: 0, 0.2268 and 1
  • TPR: 0, 0.5504 and 1
  • LowerTPR: 0, 0.5214 and 1
  • UpperTPR: 0, 0.5794 and 1
Show code
#FPR: 1-especificidad
#TPR: Sensibilidad

cat(":::", "\n")
Show code
cat("::: {.panel}", "\n", "[Acquisitive]{.panel-name}", "\n")
Acquisitive
Show code
ROCit_acq <- rocit(score=Base_fiscalia_v13c_dic_2022_3$acq_top_bin,class=Base_fiscalia_v13c_dic_2022_3$acq_bin)
plot(ROCit_acq)
Show code
pander::pander(ciROC(ROCit_acq,nboot = 500))
  • ROC estimation method: empirical
  • Confidence level: 95%
  • FPR: 0, 0.07872 and 1
  • TPR: 0, 0.441 and 1
  • LowerTPR: 0, 0.3937 and 1
  • UpperTPR: 0, 0.4883 and 1
Show code
cat(":::", "\n")
Show code
cat("::: {.panel}", "\n", "[SUD]{.panel-name}", "\n")
SUD
Show code
ROCit_sud <- rocit(score=Base_fiscalia_v13c_dic_2022_3$sud_top_bin,class=Base_fiscalia_v13c_dic_2022_3$sud_bin)
plot(ROCit_sud)
Show code
pander::pander(ciROC(ROCit_sud,nboot = 500))
  • ROC estimation method: empirical
  • Confidence level: 95%
  • FPR: 0, 0.02885 and 1
  • TPR: 0, 0.09341 and 1
  • LowerTPR: 0, 0.05106 and 1
  • UpperTPR: 0, 0.1357 and 1
Show code
cat(":::", "\n")
Show code
cat("::: {.panel}", "\n", "[Violent]{.panel-name}", "\n")
Violent
Show code
ROCit_vio <- rocit(score=Base_fiscalia_v13c_dic_2022_3$vio_top_bin,class=Base_fiscalia_v13c_dic_2022_3$vio_bin)
plot(ROCit_vio)
Show code
pander::pander(ciROC(ROCit_vio,nboot = 500))
  • ROC estimation method: empirical
  • Confidence level: 95%
  • FPR: 0, 0.1743 and 1
  • TPR: 0, 0.564 and 1
  • LowerTPR: 0, 0.5067 and 1
  • UpperTPR: 0, 0.6213 and 1
Show code
cat(":::", "\n")
Show code
cat("::: {.panel}", "\n", "[Other]{.panel-name}", "\n")
Other
Show code
ROCit_oth <- rocit(score=Base_fiscalia_v13c_dic_2022_3$oth_top_bin,class=Base_fiscalia_v13c_dic_2022_3$oth_bin)
plot(ROCit_oth)
Show code
pander::pander(ciROC(ROCit_oth,nboot = 500))
  • ROC estimation method: empirical
  • Confidence level: 95%
  • FPR: 0, 0.02994 and 1
  • TPR: 0, 0.08669 and 1
  • LowerTPR: 0, 0.05592 and 1
  • UpperTPR: 0, 0.1175 and 1
Show code
cat(":::", "\n")
Show code
cat(":::::", "\n")

Time for this code chunk to run: 0.4 minutes


Flowchart

Show code
a_tab1_lab_aft_d<- paste0('Original C1 Dataset \n(p= ', formatC(CONS_C1%>% dplyr::distinct(HASH_KEY)%>% nrow(), format='f', big.mark=',', digits=0), ';\np= ', formatC(nrow(CONS_C1), format='f', big.mark=',', digits=0),')')

a_tab2_lab_aft_d<- paste0('&#8226;Remove duplicated entries\\\\\\l&#8226;Overlapping treatments of patients\\\\\\l&#8226;Intermediate treatment events (continuous referrals)    \\\\\\l')

a_tab3_lab_aft_d<- paste0('      C1 Dataset          \n(p= ', formatC(CONS_C1_df_dup_SEP_2020%>% dplyr::distinct(hash_key)%>% nrow(), format='f', big.mark=',', digits=0), ';\nn= ', formatC(nrow(CONS_C1_df_dup_SEP_2020), format='f', big.mark=',', digits=0),')')


a_tab4_lab_aft_d<- paste0('Original Prosecutors Office\n(p= ',Base_fiscalia_v2%>% dplyr::distinct(rut_enc_saf)%>% nrow() %>% format(big.mark=','), ';\nCauses= ',Base_fiscalia_v2%>% dplyr::distinct(ruc)%>% nrow() %>% format(big.mark=','), ';\nRel.=',Base_fiscalia_v2%>%dplyr::distinct(idrelacion)%>%nrow()%>%format(big.mark=','), ';\nRUC_Vic_Imp=',Base_fiscalia_v2%>%dplyr::mutate(rel=paste0(ruc,"_",idsujeto_victima,"_",idsujeto_imputado,"_","iddelito"))%>%dplyr::distinct(rel)%>%nrow()%>%format(big.mark=','), ';\nn= ',format(nrow(Base_fiscalia_v2),big.mark=","),')')

#crimes committed after study follow-up
a_tab5_lab_aft_d1<-
    paste0("(p= ",format(nrow(unique(subset(Base_fiscalia_v2,fec_comision_simple>as.Date("2019-11-13"),"rut_enc_saf"))),big.mark=","),"; RUCs= ", format(nrow(unique(subset(Base_fiscalia_v2,fec_comision_simple>as.Date("2019-11-13"),"ruc"))), big.mark=","),";n= ", format(nrow(subset(Base_fiscalia_v2,fec_comision_simple>as.Date("2019-11-13"),"rut_enc_saf")), big.mark=","),")")
#erase entries with missing values in fec_comision_simple y termino_relacion_simple
leftovers_Base_fiscalia_v3<-
Base_fiscalia_v3 %>% 
  dplyr::left_join(after_imp_Base_fiscalia_v3_db[,c("rut_enc_saf","imp_birth_date","flowch_age")], by="rut_enc_saf") %>% 
  dplyr::rename("obs"="flowch_age") %>% 
  dplyr::mutate(imp_birth_date=dplyr::case_when(!is.na(imp_birth_date)~imp_birth_date,T~fec_nacimiento_simple))%>% dplyr::mutate(edad_comision_imp=as.numeric(fec_comision_simple-imp_birth_date)/365.25) %>% dplyr::mutate(edad_ter_rel_imp=as.numeric(termino_relacion_simple-imp_birth_date)/365.25) %>%
  #arrange the rut from the first date of comission of a crime, but we are not detecting if he/she is the victim or not
  dplyr::arrange(rut_enc_saf, edad_comision_imp) %>% #566884
  dplyr::filter(dplyr::case_when(!is.na(edad_comision_imp)~T,T~F)) %>% #566644
  dplyr::filter(imp_birth_date=="1900-01-01"|is.na(imp_birth_date))

a_tab5_lab_aft_d12<-
    paste0("(p= ",format(nrow(dplyr::distinct(leftovers_Base_fiscalia_v3,rut_enc_saf)),big.mark=","),"; RUCs= ",
           format(nrow(dplyr::distinct(leftovers_Base_fiscalia_v3,ruc)), big.mark=","),";n= ",
           format(nrow(leftovers_Base_fiscalia_v3), big.mark=","),")")

#minor to 14 years old
a_tab5_lab_aft_d13<-
    paste0("(p= ",format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v4,edad_comision_imp<14),rut_enc_saf)),big.mark=","),"; RUCs= ", format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v4,edad_comision_imp<14),ruc)), big.mark=","),";n= ", format(nrow(dplyr::filter(Base_fiscalia_v4,edad_comision_imp<14)), big.mark=","),")")

#Remove duplicated entries
a_tab5_lab_aft_d2<-
  paste0("(p= ",format(length(unique(eliminated_duplicates$rut_enc_saf)),big.mark=","),"; RUCs= ",
           format(length(unique(eliminated_duplicates$ruc)), big.mark=","),";n= ",
           format(nrow(eliminated_duplicates), big.mark=","),")")
#before 2010
a_tab5_lab_aft_d3<-
    paste0("(p= ",format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v7,fec_comision_simple<"2010-01-01"),rut_enc_saf)),big.mark=","),"; RUCs= ",
           format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v7,fec_comision_simple<"2010-01-01"),ruc)), big.mark=","),";n= ",
           format(nrow(dplyr::filter(Base_fiscalia_v7,fec_comision_simple<"2010-01-01")), big.mark=","),")")
#remove administrative annulment
a_tab5_lab_aft_d4<-
    paste0("(p= ",format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v7,agrupa_terminos=="ANULACI¿N ADMINISTRATIVA"),rut_enc_saf)),big.mark=","),"; RUCs= ",
           format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v7,agrupa_terminos=="ANULACI¿N ADMINISTRATIVA"),ruc)), big.mark=","),";n= ",
           format(nrow(dplyr::filter(Base_fiscalia_v7,agrupa_terminos=="ANULACI¿N ADMINISTRATIVA")), big.mark=","),")")
#remove grouped to another case
a_tab5_lab_aft_d5<-
    paste0("(p= ",format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v7,agrupa_terminos=="AGRUPACI¿N A OTRO CASO"),rut_enc_saf)),big.mark=","),"; RUCs= ",
           format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v7,agrupa_terminos=="AGRUPACI¿N A OTRO CASO"),ruc)), big.mark=","),";n= ",
           format(nrow(dplyr::filter(Base_fiscalia_v7,agrupa_terminos=="AGRUPACI¿N A OTRO CASO")), big.mark=","),")")

a_tab5_lab_aft_d<- paste0('&#8226;Filter crimes committed after study follow-up period',a_tab5_lab_aft_d1,'\\\\\\l&#8226;Remove duplicated entries',a_tab5_lab_aft_d2,'\\\\\\l&#8226;Correct dates (birth, comission of crime, end of judicial proceedings), missing nationality and sex\\\\\\l&#8226;Erase entries with missing values in  comission of crime, end of judicial proceedings',a_tab5_lab_aft_d12,'\\\\\\l&#8226;Erase entries with values in comission of crime when minor to 14 years old after imputation',a_tab5_lab_aft_d13,'\\\\\\l&#8226;Filter crimes committed before study follow-up',a_tab5_lab_aft_d3,'\\\\\\l&#8226;Filter records with cause of end of the proceedings= administrative annulment',a_tab5_lab_aft_d4,'\\\\\\l&#8226;Filter records with cause of end of the proceedings= grouped to another case',a_tab5_lab_aft_d5,'\\\\\\l')

a_tab6_lab_aft_d<- paste0("O.P. Dataset \n(p= ", dplyr::distinct(Base_fiscalia_v8, rut_enc_saf)%>% nrow()%>% formatC(big.mark = ","), ";\nn= ",formatC(nrow(Base_fiscalia_v8),big.mark = ","),")")

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

#DISCARD not coded as an offender
a_tab7_lab_aft_d1<-
    paste0("(p= ",format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v8,!grepl("SI",encontrado_como_imputado)),rut_enc_saf)),big.mark=","),"; RUCs= ",
           format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v8,!grepl("SI",encontrado_como_imputado)),ruc)), big.mark=","),";n= ",
           format(nrow(dplyr::filter(Base_fiscalia_v8,!grepl("SI",encontrado_como_imputado))), big.mark=","),")")

#FILTER IF THE PATIENT RECIEVES FOR THE RELATIONSHIP AMONG THOSE THAT WERE OFFENDERS
# end of proceeding  === SIMILAR TO Base_fiscalia_v9
a_tab7_lab_aft_d2<-
      paste0("(p= ",format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v8, grepl("SI",encontrado_como_imputado)) %>%
dplyr::mutate(filter=dplyr::case_when(grepl("REPARATORIO|CONDICIONAL",toupper(agrupa_terminos)) & is.na(gls_mottermino)~1, grepl("REPARATORIO|SENTENCIA DEFINITIVA CONDENATORIA|240|MONIT", toupper(agrupa_terminos), ignore.case=F)~2, T~0))%>% dplyr::filter(filter==0),rut_enc_saf)),big.mark=","),"; RUCs= ",
           format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v8, grepl("SI",encontrado_como_imputado)) %>%
dplyr::mutate(filter=dplyr::case_when(grepl("REPARATORIO|CONDICIONAL",toupper(agrupa_terminos)) & is.na(gls_mottermino)~1, grepl("REPARATORIO|SENTENCIA DEFINITIVA CONDENATORIA|240|MONIT", toupper(agrupa_terminos), ignore.case=F)~2, T~0))%>% dplyr::filter(filter==0),ruc)), big.mark=","),";n= ",
           format(nrow(dplyr::filter(Base_fiscalia_v8, grepl("SI",encontrado_como_imputado)) %>%
dplyr::mutate(filter=dplyr::case_when(grepl("REPARATORIO|CONDICIONAL",toupper(agrupa_terminos)) & is.na(gls_mottermino)~1, grepl("REPARATORIO|SENTENCIA DEFINITIVA CONDENATORIA|240|MONIT", toupper(agrupa_terminos), ignore.case=F)~2, T~0))%>% dplyr::filter(filter==0)), big.mark=","),")")

CONS_TOP_2022_anti<-
  # 107307
  CONS_TOP%>% 
  dplyr::left_join(subset(dplyr::mutate(dplyr::group_by(Base_fiscalia_v11b_dic_2022, hash_key), hash_rn=row_number())%>% ungroup(), hash_rn==1), by= c("HASH_KEY" = "hash_key"))%>% 
  dplyr::mutate(fech_ap_top_num= as.numeric(as.Date(str_sub(as.character(lubridate::parse_date_time(Fecha.Aplicación.TOP, c("%Y-%m-%d"),exact=T)),1,10))))%>% #No parse failures
  dplyr::select(HASH_KEY, fech_ap_top_num, Fecha.Aplicación.TOP, dateofbirth_imp, Hurto, Robo, Venta.Drogas, Riña, Total.VIF, Otro) %>% 
  dplyr::filter(!is.na(HASH_KEY)) %>% 
  dplyr::mutate_at(vars("Hurto", "Robo", "Venta.Drogas", "Riña", "Otro"), ~ifelse(.=="S",1,0)) %>% 
  dplyr::mutate(Total.VIF= ifelse(Total.VIF>0,1,0))%>% 
  dplyr::mutate(tot_off_top = base::rowSums(dplyr::select(.,c(Hurto, Robo, Venta.Drogas, Riña, Total.VIF, Otro)), na.rm = T)) %>% 
  dplyr::mutate(dateofbirth_imp_num= as.numeric(dateofbirth_imp),
                fech_ap_top= lubridate::parse_date_time(Fecha.Aplicación.TOP, c("%Y-%m-%d"),exact=T),
                edad_a_ap_top_num= lubridate::time_length(lubridate::interval(dateofbirth_imp, fech_ap_top),unit="years"),
                edad_b_ap_top_num= (fech_ap_top_num-dateofbirth_imp_num)/365.25,
                edad_a_ap_top_num_lim= edad_a_ap_top_num-(1/12),
                edad_b_ap_top_num_lim= edad_b_ap_top_num-(1/12)) %>% 
  dplyr::select(-dateofbirth_imp, -dateofbirth_imp_num) %>% 
  dplyr::filter(!is.na(edad_a_ap_top_num)) %>% 
  dplyr::group_by(HASH_KEY, edad_a_ap_top_num) %>% 
  dplyr::slice(-1) %>% 
  dplyr::ungroup()

a_tab7_lab_aft_d25<-
        paste0("(p= ",format(length(unique(CONS_TOP_2022_anti$HASH_KEY)),big.mark=","),"; n= ",
           format(length(CONS_TOP_2022_anti$HASH_KEY), big.mark=","),")")


a_tab7_lab_aft_d3<-
        paste0("(p= ",format(nrow(dplyr::distinct(dplyr::anti_join(CONS_C1_df_dup_SEP_2020, CONS_C1_df_dup_SEP_2020_22_d, by=c("hash_key","dup")),hash_key)),big.mark=","),"; n= ",
           format(nrow(dplyr::anti_join(CONS_C1_df_dup_SEP_2020, CONS_C1_df_dup_SEP_2020_22_d, by=c("hash_key","dup"))), big.mark=","),")")

a_tab8_lab_aft_d <- paste0('Offenses previous\nto the first admission\n',"(p= ",format(length(unique(Base_fiscalia_v10b_dic_2022[,"hash_key"])),big.mark=","),"; RUCs= ",
           format(length(unique(Base_fiscalia_v10b_dic_2022[,"caseid"])), big.mark=","),";\nn= ",
           format(length(Base_fiscalia_v10b_dic_2022[,"caseid"]), big.mark=","),")")

a_tab9_lab_aft<- paste0('&#8226;Discard observations coded as victims rather than ofenders ',tab7_lab_aft_d1,'\\\\\\l&#8226;Discard observations depending on the values of the end of the proceedings among offenders ',tab7_lab_aft_d2,'\\\\\\l&#8226;Discard TOP applications with duplicated dates and user ',a_tab7_lab_aft_d25,'\\\\\\l&#8226;Get the first treatment of each user ',a_tab7_lab_aft_d3,'\\\\\\l')

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#//  ON x.hash_key == y.id AND   x.edad_al_egres_imp > y.age_offending_imp AND x.dup = 1"

library(DiagrammeR) 
plot_merge_flowchart_excercise<-
  grViz("digraph flowchart {
        fontname='Comic Sans MS'
        # node definitions with substituted label text
        node [shape = rectangle,fontsize = 9]        
        tab1 [label = '@@1']
        blank [label = '', width = 0.0001, height = 0.0001]
        tab2 [label = '@@2',fontsize = 7]
        tab3 [label = '@@3']
        
        tab4 [label = '@@4',fontsize = 8]
        blank2 [label = '', width = 0.0001, height = 0.0001]
        tab5 [label = '@@5',fontsize = 7]
        tab6 [label= '@@6']
        
        blank3 [label = '', width = 0.0001, height = 0.0001]
        blank4 [label = '', width = 0.0001, height = 0.0001]      
        blank5 [label = '', width = 0.0001, height = 0.0001] 
        
        blank6 [label = '', width = 0.0001, height = 0.0001]
        tab7 [label = '@@7',fontsize = 7]
        tab8 [label= '@@8']
        
        # edge definitions with the node IDs
        rankdir='TB'; rank= same; tab1 -> blank [arrowhead = none,label='        Data wrangling and normalization process',fontsize = 8];
        rankdir='TB'; rank= same; tab1; tab3;   
        blank -> tab2;
                    subgraph {
                rank = same; tab2; blank;
                    }
        rankdir='TB'; rank= same; blank -> tab3;      
              
        tab4 -> blank2 [arrowhead = none,label='        Data wrangling and normalization process',fontsize = 8];
        blank2 -> tab5
        blank2 -> tab6
        blank4 -> blank6 [arrowhead= none, label='  &#10198;']
        blank6 -> tab7 
                    subgraph {
          rank = same; blank6; tab7;
                    }
        # bring_db1_a                    
        blank6 -> tab8 [label= '        Merge records where discharge age is greater than age of offenses', fontsize = 8] 
                    subgraph {
          rank = same; tab5; blank2;
                    }
                    subgraph {
        rank= same; tab3 -> blank3 -> blank4 -> blank5 -> tab6 [arrowhead= none]
                    }
        }
                    subgraph {
          rank = same; tab3; tab6;
                    }
                    subgraph {
          rank = same; tab1; tab4;
                    }         
                    subgraph {
          rank = same; tab2; tab5;
                    } 
                    subgraph {
          rank = same; tab1; tab3;
          rankdir=TB
          rank=same
                    }
                    subgraph {
          rank = same; tab4; tab6;
          rankdir=TB
          rank=same
                    }                     
  
        [1]:  a_tab1_lab_aft_d
        [2]:  a_tab2_lab_aft_d
        [3]:  a_tab3_lab_aft_d
        [4]:  a_tab4_lab_aft_d
        [5]:  a_tab5_lab_aft_d
        [6]:  a_tab6_lab_aft_d
        [7]:  a_tab9_lab_aft
        [8]:  a_tab8_lab_aft_d
        ", width = 1200,
          height = 900)

DPI = 1200
WidthCM = 11
HeightCM = 8

plot_merge_flowchart_excercise
Show code
library(DiagrammeRsvg)

plot_merge_flowchart_excercise %>%
  export_svg %>% charToRaw %>% rsvg::rsvg_pdf("./_figs/_flowchart_comm_DAD.pdf")
plot_merge_flowchart_excercise %>% DiagrammeRsvg::export_svg()%>%charToRaw %>% rsvg::rsvg(width = WidthCM *(DPI/2.54), height = HeightCM *(DPI/2.54)) %>% png::writePNG("./_figs/_flowchart_comm_DAD.png")

htmlwidgets::saveWidget(plot_merge_flowchart_excercise, "./_figs/_flowchart_comm_DAD.html")
webshot::webshot("./_figs/_flowchart_comm_DAD.html", "./_figs/_flowchart_comm_DAD.png",vwidth = 1200, vheight = 900, zoom = 3)
Show code
webshot::webshot("./_figs/_flowchart_comm_DAD.html", "./_figs/_flowchart_comm_DAD_alt.pdf")

Time for this code chunk to run: 0 minutes

Show code
a_tab10_lab_aft_d<- paste0("TOP Dataset \n(p= ", dplyr::distinct(CONS_TOP, HASH_KEY)%>% nrow()%>% formatC(big.mark = ","),";\nn= ",formatC(nrow(CONS_TOP),big.mark = ","),")")

#bring_db3
a_tab11_lab_aft_d1<- 
    paste0( '(p= ',format(nrow(dplyr::distinct(dplyr::group_by(Base_fiscalia_v9, id, caseid, end_type, fec_comision_simple, crime_code_c) %>% dplyr::slice(-1) %>% dplyr::ungroup(),id)), big.mark=","), '; RUCs= ', format(nrow(dplyr::distinct(dplyr::group_by(Base_fiscalia_v9, id, caseid, end_type, fec_comision_simple, crime_code_c) %>% dplyr::slice(-1) %>% dplyr::ungroup(),caseid)), big.mark=","), '; n= ', format(nrow(dplyr::group_by(Base_fiscalia_v9, id, caseid, end_type, fec_comision_simple, crime_code_c) %>% dplyr::slice(-1) %>% dplyr::ungroup()), big.mark=",") , ')'
    )
message(
paste0("Patients in PO records with unique combination of ID, RUC, end type and date of comission of the offense and offense ", a_tab11_lab_aft_d1)
)

a_tab11_lab_aft_d<- paste0('&#8226;Count events such as shoplifting, theft, domestic violence, drug selling, fights, or related behavior in the last four weeks','\\\\\\l',
                           '&#8226;Get PO data with more than one record with the same case ID, sentence, date of offense and type of crime ', a_tab11_lab_aft_d1,'\\\\\\l')

a_tab12_lab_aft_d<- 
    paste0('Merged database\n(p= ',format(nrow(dplyr::distinct(Base_fiscalia_v13c_dic_2022_3,HASH_KEY)), big.mark=","), '; n= ', format(nrow(Base_fiscalia_v13c_dic_2022_3), big.mark=",") , ')'
    )


plot_merge_flowchart_excercise2<-
  grViz("digraph flowchart {
        fontname='Comic Sans MS'
        # edge definitions with the node IDs
        # node definitions with substituted label text
        node [shape = rectangle, fontsize = 9]        
        tab1 [label = '@@1', fontsize = 7]
        tab2 [label = '@@2', fontsize = 7]
        blank [label = '', width = 0.0001, height = 0.0001]
        blanka [label = '', width = 0.0001, height = 0.0001]
        blankb [label = '', width = 0.0001, height = 0.0001]
        
        # bring_db2       
          subgraph {
            rank = same; tab1 -> blanka -> blank -> blankb -> tab2 [arrowhead= none]
          }
        tab3 [label = '@@3',fontsize = 6]
        blank2 [label = '', width = 0.0001, height = 0.0001]
        blank -> blank2 [arrowhead= none, label='  &#10198;']
        subgraph {
            rank = same; blank2 -> tab3
          }
        tab4 [label = '@@4',fontsize = 9]
        blank2 -> tab4 [label='        Select records of offenses committed before the application of TOP but\n        at least the last month previous to the application of TOP \\\\\\l',fontsize = 7];
  }

        [1]:  a_tab8_lab_aft_d
        [2]:  a_tab10_lab_aft_d
        [3]:  a_tab11_lab_aft_d
        [4]:  a_tab12_lab_aft_d
        [5]:  ''
        [6]:  ''
        [7]:  ''
        [8]:  ''
        ", width = 1200,
          height = 900)

plot_merge_flowchart_excercise2
Show code
plot_merge_flowchart_excercise2 %>%
  export_svg %>% charToRaw %>% rsvg::rsvg_pdf("./_figs/_flowchart2_comm_DAD.pdf")
plot_merge_flowchart_excercise2 %>% DiagrammeRsvg::export_svg()%>%charToRaw %>% rsvg::rsvg(width = WidthCM *(DPI/2.54), height = HeightCM *(DPI/2.54)) %>% png::writePNG("./_figs/_flowchart2_comm_DAD.png")

htmlwidgets::saveWidget(plot_merge_flowchart_excercise2, "./_figs/_flowchart2_comm_DAD.html")
webshot::webshot("./_figs/_flowchart2_comm_DAD.html", "./_figs/_flowchart2_comm_DAD.png",vwidth = 1200, vheight = 900, zoom = 3)
Show code
webshot::webshot("./_figs/_flowchart2_comm_DAD.html", "./_figs/_flowchart2_comm_DAD_alt.pdf")

Time for this code chunk to run: 0 minutes


Session Info

Show code
message(Sys.getenv("R_LIBS_USER"))

C:/Users/CISS Fondecyt/Mi unidad/Alvacast/SISTRAT 2022 (github)/renv/library/R-4.1/x86_64-w64-mingw32;C:/Users/CISS Fondecyt/AppData/Local/Temp/Rtmpoxqstd/renv-system-library

Show code
[1] "2023-02-18"
Show code
message(paste0("Editor context: ", path))

Editor context: C:/Users/CISS Fondecyt/Mi unidad/Alvacast/SISTRAT 2022 (github)

Show code
if (grepl("CISS Fondecyt",rstudioapi::getSourceEditorContext()$path)==T){
    save.image("C:/Users/CISS Fondecyt/Mi unidad/Alvacast/SISTRAT 2022 (github)/14_alt.RData")
  } else if (grepl("andre",rstudioapi::getSourceEditorContext()$path)==T){
    save.image("C:/Users/andre/Desktop/SUD_CL/14_alt.RData")
  } else if (grepl("E:",rstudioapi::getSourceEditorContext()$path)==T){
    save.image("E:/Mi unidad/Alvacast/SISTRAT 2022 (github)/14_alt.RData")
  } else {
    save.image(paste0(sub("2019","2022",sub("SUD_CL","",path)),"14_alt.RData"))
  }
#load(paste0(dirname(rstudioapi::getSourceEditorContext()$path),"/14_alt.RData"))

sesion_info <- devtools::session_info()
dplyr::select(
  tibble::as_tibble(sesion_info$packages),
  c(package, loadedversion, source)
) %>% 
  DT::datatable(filter = 'top', colnames = c('Row number' =1,'Variable' = 2, 'Percentage'= 3),
              caption = htmltools::tags$caption(
        style = 'caption-side: top; text-align: left;',
        '', htmltools::em('Packages')),
      options=list(
initComplete = htmlwidgets::JS(
        "function(settings, json) {",
        "$(this.api().tables().body()).css({
            'font-family': 'Helvetica Neue',
            'font-size': '50%', 
            'code-inline-font-size': '15%', 
            'white-space': 'nowrap',
            'line-height': '0.75em',
            'min-height': '0.5em'
            });",#;
        "}")))

Time for this code chunk to run: 0.1 minutes

Show code
#put name of the file
file<- "fiscalia_ags_jan_2023_match_SENDA.dta"

export_lab_stata_merge<-
  tibble::rownames_to_column(data.frame(Hmisc::label(dplyr::select(Base_fiscalia_v13,  all_of(c("hash_key", cont_vars_desc, cat_vars_desc, cat_vars_desc_off, vars_desc, "fech_ing_num_1", "fech_egres_imp", "cut_com_del", "cut_fec_nac","offender_d", "comuna_residencia_cod_rec", "porc_pobr", "Clasificación"))))))%>% data.frame() %>%
  dplyr::rename("code" = !!names(.[1]), "label" = !!names(.[2]))%>% data.frame()%>%
  dplyr::mutate(first= "cap noi label variable")%>%
  dplyr::mutate(final= paste0(first, " ",code,' "',label,'"'))%>%
  dplyr::select(-code,-label,-first)%>%
  dplyr::rename("*clear all"="final") %>%
  rbind(paste0('cap noi save "', gsub('/', '\\', path, fixed=T),'\\',file,'", replace'))%>%
  rbind(paste0('cap noi save "', gsub('/', '\\', path, fixed=T),'\\',file,'", replace'))

rbind(paste0('cap noi use "', gsub('/', '\\', path, fixed=T),'\\',file,'", clear'),export_lab_stata_merge) %>% knitr::kable("markdown")

write.table(rbind(paste0('cap noi use "', gsub('/', '\\', path, fixed=T),'\\',file,'", clear'),export_lab_stata_merge), file = paste0(path,"/_label_var_to_stata_jan2023.do"), sep = "",row.names = FALSE, quote = FALSE, fileEncoding="UTF-8")

Time for this code chunk to run: 0 minutes


Show code
*should be in the same folder of the .Rmd to work
cap noi do _label_var_to_stata_jan2023
Error in running command st

Time for this code chunk to run: 0 minutes